Laad packages

Lees data in en beoordeel kwaliteit

myDF <- read.csv2("hra-data.csv")
head(myDF)
summary(myDF)
      Peildatum     Stam.nummer               Stromen          Naam      Man.Vrouw   
 31-01-2014:1744   Min.   :   1.0                 :4081   a      : 698   Man  :4023  
 31-01-2015:1934   1st Qu.: 319.5   -             :   4   o      : 400   Vrouw:4360  
 31-01-2016:2444   Median : 591.0   Doorstroom IN :2123   e      : 396               
 31-01-2017:2261   Mean   : 590.2   Doorstroom UIT:1705   h      : 396               
                   3rd Qu.: 852.0   Instroom      : 305   n      : 381               
                   Max.   :2465.0   Uitstroom     : 165   k      : 377               
                                                          (Other):5735               
    Geboortedatum     Leeftijd      Dienst.jaren     Organisatie            Organisatie.niveau.1
 04-04-1986:  19   Min.   :20.20   Min.   : 0.100   Banking:8383   Banking and Payment:6272     
 10-07-1954:  19   1st Qu.:37.85   1st Qu.: 2.700                  Corporate Services :2111     
 02-09-1963:  18   Median :47.50   Median : 6.200                                               
 03-05-1965:  18   Mean   :46.27   Mean   : 8.964                                               
 30-03-1976:  18   3rd Qu.:54.50   3rd Qu.:12.500                                               
 15-04-1953:  17   Max.   :71.20   Max.   :35.900                                               
 (Other)   :8274                                                                                
                Organisatie.niveau.2                Organisatie.niveau.3
 Business Support Services:1574      Business Support Services:1573     
 Private Banking          :1307      Domestic Markets         :1048     
 Domestic Markets         :1278      Large Accounts           : 848     
 Investments              :1213      National Investments     : 657     
 Securities               : 692      Agriculture Credit       : 560     
 Auditing                 : 595      Bank and Trusts          : 464     
 (Other)                  :1724      (Other)                  :3233     
                   Functie     Garantieschaal  Functieschaal        FTE           Type.Contract 
 Business Banker       :3197   Min.   : 2.00   Min.   : 1.00   Min.   :0.0250   Tijdelijk:1358  
 Consumer Banker       :1700   1st Qu.: 9.00   1st Qu.: 9.00   1st Qu.:0.6000   Vast     :7025  
 Administration officer: 556   Median :11.00   Median :11.00   Median :0.8000                   
 Investment officer    : 351   Mean   :10.42   Mean   :10.24   Mean   :0.7624                   
 Investment assistant  : 240   3rd Qu.:12.00   3rd Qu.:12.00   3rd Qu.:1.0000                   
 Business Broker       : 194   Max.   :18.00   Max.   :17.00   Max.   :1.0000                   
 (Other)               :2145                                                                    
 Type.Arbeidsovereenkomst  Hoogstgenoten.opleiding Aantal.ziekmeldingen Dagen.afwezig 
 D2:7027                               :1674       -      :3895         -      :3575  
 D3: 206                  Associate    : 981       1      :2195         3,00   : 218  
 D4:1150                  Bachelor     :2660       2      :1284         1,00   : 203  
                          Master       :2875       3      : 592         2,00   : 198  
                          Ph.D/Doctoral: 193       4      : 267         4,00   : 180  
                                                   5      :  93         6,00   : 158  
                                                   (Other):  57         (Other):3851  
 Leidinggevende Naam.leidingegevende Einddatum.Bepaalde.Tijd
 -      :2631   -      :2639         31-12-2099:6949        
 780    : 286   m      : 995         14-03-2015: 184        
 781    : 247   l      : 570         13-03-2016: 134        
 184    : 246   j      : 460         14-04-2015: 103        
 608    : 240   r      : 430         13-03-2017:  92        
 655    : 216   o      : 376         13-04-2016:  68        
 (Other):4517   (Other):2913         (Other)   : 853        

Verander data types, filter en voeg kolommen toe

myDF$Peildatum <- as.Date(myDF$Peildatum, format="%d-%m-%Y")
myDF$Geboortedatum <- as.Date(myDF$Geboortedatum, format="%d-%m-%Y")
myDF$Einddatum.Bepaalde.Tijd <- as.Date(myDF$Einddatum.Bepaalde.Tijd, format="%d-%m-%Y")
myDF$Stromen <- gsub(" ", "", myDF$Stromen)
myDF$Aantal.ziekmeldingen <- gsub("-", "", myDF$Aantal.ziekmeldingen)
myDF$Aantal.ziekmeldingen <- as.numeric(gsub(",", ".", myDF$Aantal.ziekmeldingen))
myDF$Dagen.afwezig <- gsub("-", "", myDF$Dagen.afwezig)
myDF$Dagen.afwezig <- as.numeric(gsub(",", ".", myDF$Dagen.afwezig))
# # Sommige personen hebben de organisatie eerder verlaten
start.date <- as.Date("2016-01-31")
myDF <- myDF %>% filter(Peildatum == "2017-01-31", Einddatum.Bepaalde.Tijd > start.date, Stromen == "")
myDF <- mutate(myDF, FTE.x.werkdagen=FTE*365, FTE.x.ziektedagen=FTE*Dagen.afwezig)
summary(myDF)
   Peildatum           Stam.nummer       Stromen               Naam     Man.Vrouw  
 Min.   :2017-01-31   Min.   :   1.0   Length:1064        a      : 93   Man  :498  
 1st Qu.:2017-01-31   1st Qu.: 342.8   Class :character   h      : 52   Vrouw:566  
 Median :2017-01-31   Median : 628.5   Mode  :character   o      : 52              
 Mean   :2017-01-31   Mean   : 627.1                      e      : 50              
 3rd Qu.:2017-01-31   3rd Qu.: 927.2                      d      : 49              
 Max.   :2017-01-31   Max.   :1230.0                      q      : 49              
                                                          (Other):719              
 Geboortedatum           Leeftijd      Dienst.jaren     Organisatie  
 Min.   :1945-11-01   Min.   :24.20   Min.   : 1.100   Banking:1064  
 1st Qu.:1960-10-26   1st Qu.:40.80   1st Qu.: 2.800                 
 Median :1966-12-06   Median :50.15   Median : 6.400                 
 Mean   :1968-07-20   Mean   :48.53   Mean   : 9.247                 
 3rd Qu.:1976-03-30   3rd Qu.:56.30   3rd Qu.:12.700                 
 Max.   :1992-12-06   Max.   :71.20   Max.   :35.900                 
                                                                     
          Organisatie.niveau.1                Organisatie.niveau.2
 Banking and Payment:811       Business Support Services:287      
 Corporate Services :253       Investments              :164      
                               Private Banking          :151      
                               Domestic Markets         :113      
                               Securities               : 82      
                               Auditing                 : 67      
                               (Other)                  :200      
                Organisatie.niveau.3                   Functie    Garantieschaal 
 Business Support Services:287       Business Banker       :362   Min.   : 2.00  
 Large Accounts           :100       Consumer Banker       :239   1st Qu.: 9.00  
 International Investments: 91       Administration officer: 84   Median :11.00  
 Domestic Markets         : 89       Investment officer    : 42   Mean   :10.45  
 Corporate Investments    : 64       Business Broker       : 36   3rd Qu.:12.00  
 Bank and Trusts          : 63       Investment assistant  : 35   Max.   :18.00  
 (Other)                  :370       (Other)               :266                  
 Functieschaal        FTE           Type.Contract Type.Arbeidsovereenkomst
 Min.   : 1.00   Min.   :0.0250   Tijdelijk:160   D2:904                  
 1st Qu.: 9.00   1st Qu.:0.6000   Vast     :904   D3: 23                  
 Median :11.00   Median :0.8000                   D4:137                  
 Mean   :10.25   Mean   :0.7629                                           
 3rd Qu.:12.00   3rd Qu.:1.0000                                           
 Max.   :17.00   Max.   :1.0000                                           
                                                                          
  Hoogstgenoten.opleiding Aantal.ziekmeldingen Dagen.afwezig    Leidinggevende
              :222        Min.   :1.000        Min.   :  0.00   -      :325   
 Associate    :126        1st Qu.:1.000        1st Qu.:  2.40   1175   : 37   
 Bachelor     :328        Median :1.000        Median :  5.60   184    : 33   
 Master       :362        Mean   :1.842        Mean   : 20.98   1179   : 30   
 Ph.D/Doctoral: 26        3rd Qu.:2.000        3rd Qu.: 14.00   1178   : 28   
                          Max.   :7.000        Max.   :380.52   780    : 27   
                          NA's   :501          NA's   :461      (Other):584   
 Naam.leidingegevende Einddatum.Bepaalde.Tijd FTE.x.werkdagen   FTE.x.ziektedagen
 -      :326          Min.   :2016-12-12      Min.   :  9.125   Min.   :  0.00   
 m      :131          1st Qu.:2099-12-31      1st Qu.:219.000   1st Qu.:  1.92   
 l      : 76          Median :2099-12-31      Median :292.000   Median :  4.48   
 a      : 65          Mean   :2086-11-23      Mean   :278.476   Mean   : 17.09   
 r      : 54          3rd Qu.:2099-12-31      3rd Qu.:365.000   3rd Qu.: 12.00   
 j      : 52          Max.   :2099-12-31      Max.   :365.000   Max.   :380.52   
 (Other):360                                                    NA's   :461      

Bereken ziekteverzuim

tot.aant.ziektedagen <- sum(myDF$FTE.x.ziektedagen, na.rm = TRUE)
tot.aant.werkdagen <- sum(myDF$FTE.x.werkdagen)
ziekteverzuim = tot.aant.ziektedagen / tot.aant.werkdagen
tot.aant.ziektedagen
[1] 10307.52
tot.aant.werkdagen
[1] 296298.6
ziekteverzuim
[1] 0.0347876

Visualiseer data

p1 <- ggplot(myDF) +
  geom_freqpoly(aes(x=Leeftijd, color = Man.Vrouw), binwidth = 2)
ggplotly(p1, width = 800)
p2 <- ggplot(myDF) +
  geom_boxplot(aes(x=Man.Vrouw, y=Leeftijd, fill=Man.Vrouw))
ggplotly(p2, width = 800)
p3 <- ggplot(myDF) +
  geom_point(aes(x=Leeftijd, y=Dagen.afwezig, fill=Man.Vrouw))
ggplotly(p3, width = 800)
p4 <- ggplot(myDF) +
  geom_point(aes(x=Organisatie.niveau.1 , y=Dagen.afwezig, fill=Type.Contract))
ggplotly(p4, width = 800)
p4 <- ggplot(myDF) +
  geom_boxplot(aes(x=Man.Vrouw , y=FTE, fill=Man.Vrouw))
ggplotly(p4, width = 800)

Multivariate analysis

grpMV <- group_by(myDF, Man.Vrouw)
summarize(grpMV, Aantal = n(), Gem.Leeftijd = mean(Leeftijd))
summarize(grpMV, Aantal = n(), Gem.dgn.afw = mean(Dagen.afwezig, na.rm = TRUE))
grpTC <- group_by(myDF, Type.Contract)
summarize(grpTC, Aantal = n(), Gem.Leeftijd = mean(Leeftijd))
summarize(grpTC, Aantal = n(), Gem.dgn.afw = mean(Dagen.afwezig, na.rm = TRUE))
m1<- aov(Dagen.afwezig ~ Type.Contract, data = myDF)
summary(m1)
               Df  Sum Sq Mean Sq F value Pr(>F)
Type.Contract   1     705   704.7   0.352  0.553
Residuals     601 1202574  2001.0               
461 observations deleted due to missingness
m2<- aov(Dagen.afwezig ~ Organisatie.niveau.1, data = myDF)
summary(m2)
                      Df  Sum Sq Mean Sq F value Pr(>F)
Organisatie.niveau.1   1     261   261.2    0.13  0.718
Residuals            601 1203018  2001.7               
461 observations deleted due to missingness
m3<- aov(FTE ~ Man.Vrouw, data = myDF)
summary(m3)
              Df Sum Sq Mean Sq F value Pr(>F)    
Man.Vrouw      1   5.61   5.608   105.5 <2e-16 ***
Residuals   1062  56.43   0.053                   
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
plot(TukeyHSD(m3, conf.level = 0.99),las=1, col = "red")

LS0tCnRpdGxlOiAiSFIgQW5hbHl0aWNzIgpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sKLS0tCgojIyMgTGFhZCBwYWNrYWdlcwoKYGBge3IgbWVzc2FnZT1GQUxTRSwgd2FybmluZz1GQUxTRSwgaW5jbHVkZT1GQUxTRSwgcGFnZWQucHJpbnQ9RkFMU0V9CmxpYnJhcnkodGlkeXZlcnNlKQpsaWJyYXJ5KHBsb3RseSkKYGBgCgojIyMgTGVlcyBkYXRhIGluIGVuIGJlb29yZGVlbCBrd2FsaXRlaXQKCmBgYHtyfQpteURGIDwtIHJlYWQuY3N2MigiaHJhLWRhdGEuY3N2IikKaGVhZChteURGKQpzdW1tYXJ5KG15REYpCmBgYAoKIyMgVmVyYW5kZXIgZGF0YSB0eXBlcywgZmlsdGVyIGVuIHZvZWcga29sb21tZW4gdG9lCgpgYGB7cn0KbXlERiRQZWlsZGF0dW0gPC0gYXMuRGF0ZShteURGJFBlaWxkYXR1bSwgZm9ybWF0PSIlZC0lbS0lWSIpCm15REYkR2Vib29ydGVkYXR1bSA8LSBhcy5EYXRlKG15REYkR2Vib29ydGVkYXR1bSwgZm9ybWF0PSIlZC0lbS0lWSIpCm15REYkRWluZGRhdHVtLkJlcGFhbGRlLlRpamQgPC0gYXMuRGF0ZShteURGJEVpbmRkYXR1bS5CZXBhYWxkZS5UaWpkLCBmb3JtYXQ9IiVkLSVtLSVZIikKCm15REYkU3Ryb21lbiA8LSBnc3ViKCIgIiwgIiIsIG15REYkU3Ryb21lbikKbXlERiRBYW50YWwuemlla21lbGRpbmdlbiA8LSBnc3ViKCItIiwgIiIsIG15REYkQWFudGFsLnppZWttZWxkaW5nZW4pCm15REYkQWFudGFsLnppZWttZWxkaW5nZW4gPC0gYXMubnVtZXJpYyhnc3ViKCIsIiwgIi4iLCBteURGJEFhbnRhbC56aWVrbWVsZGluZ2VuKSkKbXlERiREYWdlbi5hZndlemlnIDwtIGdzdWIoIi0iLCAiIiwgbXlERiREYWdlbi5hZndlemlnKQpteURGJERhZ2VuLmFmd2V6aWcgPC0gYXMubnVtZXJpYyhnc3ViKCIsIiwgIi4iLCBteURGJERhZ2VuLmFmd2V6aWcpKQoKCiMgIyBTb21taWdlIHBlcnNvbmVuIGhlYmJlbiBkZSBvcmdhbmlzYXRpZSBlZXJkZXIgdmVybGF0ZW4Kc3RhcnQuZGF0ZSA8LSBhcy5EYXRlKCIyMDE2LTAxLTMxIikKbXlERiA8LSBteURGICU+JSBmaWx0ZXIoUGVpbGRhdHVtID09ICIyMDE3LTAxLTMxIiwgRWluZGRhdHVtLkJlcGFhbGRlLlRpamQgPiBzdGFydC5kYXRlLCBTdHJvbWVuID09ICIiKQoKbXlERiA8LSBtdXRhdGUobXlERiwgRlRFLngud2Vya2RhZ2VuPUZURSozNjUsIEZURS54LnppZWt0ZWRhZ2VuPUZURSpEYWdlbi5hZndlemlnKQpzdW1tYXJ5KG15REYpCmBgYAoKIyMjIEJlcmVrZW4gemlla3RldmVyenVpbQpgYGB7cn0KdG90LmFhbnQuemlla3RlZGFnZW4gPC0gc3VtKG15REYkRlRFLnguemlla3RlZGFnZW4sIG5hLnJtID0gVFJVRSkKdG90LmFhbnQud2Vya2RhZ2VuIDwtIHN1bShteURGJEZURS54LndlcmtkYWdlbikKemlla3RldmVyenVpbSA9IHRvdC5hYW50LnppZWt0ZWRhZ2VuIC8gdG90LmFhbnQud2Vya2RhZ2VuCgp0b3QuYWFudC56aWVrdGVkYWdlbgp0b3QuYWFudC53ZXJrZGFnZW4Kemlla3RldmVyenVpbQoKYGBgCgojIyMgVmlzdWFsaXNlZXIgZGF0YQoKYGBge3J9CgpwMSA8LSBnZ3Bsb3QobXlERikgKwogIGdlb21fZnJlcXBvbHkoYWVzKHg9TGVlZnRpamQsIGNvbG9yID0gTWFuLlZyb3V3KSwgYmlud2lkdGggPSAyKQpnZ3Bsb3RseShwMSwgd2lkdGggPSA4MDApCmBgYAoKYGBge3J9CnAyIDwtIGdncGxvdChteURGKSArCiAgZ2VvbV9ib3hwbG90KGFlcyh4PU1hbi5Wcm91dywgeT1MZWVmdGlqZCwgZmlsbD1NYW4uVnJvdXcpKQpnZ3Bsb3RseShwMiwgd2lkdGggPSA4MDApCgpgYGAKCmBgYHtyfQpwMyA8LSBnZ3Bsb3QobXlERikgKwogIGdlb21fcG9pbnQoYWVzKHg9TGVlZnRpamQsIHk9RGFnZW4uYWZ3ZXppZywgZmlsbD1NYW4uVnJvdXcpKQpnZ3Bsb3RseShwMywgd2lkdGggPSA4MDApCmBgYAoKYGBge3J9CnA0IDwtIGdncGxvdChteURGKSArCiAgZ2VvbV9wb2ludChhZXMoeD1PcmdhbmlzYXRpZS5uaXZlYXUuMSAsIHk9RGFnZW4uYWZ3ZXppZywgZmlsbD1UeXBlLkNvbnRyYWN0KSkKZ2dwbG90bHkocDQsIHdpZHRoID0gODAwKQpgYGAKCmBgYHtyfQpwNCA8LSBnZ3Bsb3QobXlERikgKwogIGdlb21fYm94cGxvdChhZXMoeD1NYW4uVnJvdXcgLCB5PUZURSwgZmlsbD1NYW4uVnJvdXcpKQpnZ3Bsb3RseShwNCwgd2lkdGggPSA4MDApCmBgYAoKIyMjIE11bHRpdmFyaWF0ZSBhbmFseXNpcwoKYGBge3J9CmdycE1WIDwtIGdyb3VwX2J5KG15REYsIE1hbi5Wcm91dykKc3VtbWFyaXplKGdycE1WLCBBYW50YWwgPSBuKCksIEdlbS5MZWVmdGlqZCA9IG1lYW4oTGVlZnRpamQpKQpzdW1tYXJpemUoZ3JwTVYsIEFhbnRhbCA9IG4oKSwgR2VtLmRnbi5hZncgPSBtZWFuKERhZ2VuLmFmd2V6aWcsIG5hLnJtID0gVFJVRSkpCmBgYAoKYGBge3J9CmdycFRDIDwtIGdyb3VwX2J5KG15REYsIFR5cGUuQ29udHJhY3QpCnN1bW1hcml6ZShncnBUQywgQWFudGFsID0gbigpLCBHZW0uTGVlZnRpamQgPSBtZWFuKExlZWZ0aWpkKSkKc3VtbWFyaXplKGdycFRDLCBBYW50YWwgPSBuKCksIEdlbS5kZ24uYWZ3ID0gbWVhbihEYWdlbi5hZndlemlnLCBuYS5ybSA9IFRSVUUpKQpgYGAKCmBgYHtyfQptMTwtIGFvdihEYWdlbi5hZndlemlnIH4gVHlwZS5Db250cmFjdCwgZGF0YSA9IG15REYpCnN1bW1hcnkobTEpCgptMjwtIGFvdihEYWdlbi5hZndlemlnIH4gT3JnYW5pc2F0aWUubml2ZWF1LjEsIGRhdGEgPSBteURGKQpzdW1tYXJ5KG0yKQoKbTM8LSBhb3YoRlRFIH4gTWFuLlZyb3V3LCBkYXRhID0gbXlERikKc3VtbWFyeShtMykKCnBsb3QoVHVrZXlIU0QobTMsIGNvbmYubGV2ZWwgPSAwLjk5KSxsYXM9MSwgY29sID0gInJlZCIpCgpgYGAKCg==